RNA-Seq: expression comparison of lit, old and new data

Libraries required

library(plotly)
library(edgeR)
library(pheatmap)
library(viridis)
library(plyr)

Data

load("input/SC_controls_rnaseq_salmon.tds.RData")
lab <- salmon

load("input/SC_controls_rnaseq_lit_salmon.tds.RData")
lit <- salmon

load("input/SC_controls_rnaseq_lab_june2021.tds.RData")
ns <- salmon

Expression comparison of PND7/8

plot_expr <- function(s1, s2, p1, p2, n1, n2, num = 5000) {
  library(robcor)
  dat1 <- log2(sort(rowMeans(
    s1@gene.counts[, grep(pattern = p1, x = colnames(s1@gene.counts), value = T)]
  ), decreasing = TRUE))

  if(p2 == "PND14" | p2 == "PNW8"){
  dat2 <- log2(sort((
    s2@gene.counts[, grep(pattern = p2, x = colnames(s2@gene.counts), value = T)]
  ), decreasing = TRUE))
  } else{
  dat2 <- log2(sort(rowMeans(
    s2@gene.counts[, grep(pattern = p2, x = colnames(s2@gene.counts), value = T)]
  ), decreasing = TRUE))
}
  genes <- union(names(dat1)[1:num], names(dat2)[1:num])

  x <- dat1[genes]
  y <- dat2[genes]

  x[is.infinite(x)] <- 0
  y[is.infinite(y)] <- 0
  
  fit <- lm(y ~ x)
  fitr <- MASS::rlm(y ~ x)
  
  cor <- cor.test(x,y)
  cr <- robcor(x,y)
  
  tl <- paste0(
    "r = ", round(cor$estimate, 2),
    "; p", ifelse(cor$p.value > 0.001, paste(" =", cor$p.value), " < 0.001"),
    " robcor: ", round(cr, 2)
    )

  plot_ly() %>%
    add_trace(x = x, y = y, mode = "markers", name = "Genes") %>%
    add_trace(x = x, y = predict(fit), mode = "lines", name = "lm") %>%
    add_trace(x = x, y = predict(fitr), mode = "lines", name = "rlm") %>%
    layout(
      xaxis = list(title = n1),
      yaxis = list(title = n2),
      title = paste("Pearson's correlation:", tl)
    )
}

PND7 vs PND8 (old)

plot_expr(s1 = lab, s2 = lit, p1 = "PND8", p2 = "PND7", n1 = "PND8 (old)", n2 = "PND7 (lit)")

PND7 vs PND8 (new)

plot_expr(s1 = ns, s2 = lit, p1 = "PND8", p2 = "PND7", n1 = "PND8 (new)", n2 = "PND7 (lit)")

PND8 (old) vs PND8 (new)

plot_expr(s1 = ns, s2 = lab, p1 = "PND8", p2 = "PND8", n1 = "PND8 (new)", n2 = "PND8 (old)")

Expression comparison of PND14/15

PND14 vs PND15 (old)

plot_expr(s1 = lab, s2 = lit, p1 = "PND15", p2 = "PND14", n1 = "PND15 (old)", n2 = "PND14 (lit)")

PND14 vs PND15 (new)

plot_expr(s1 = ns, s2 = lit, p1 = "PND15", p2 = "PND14", n1 = "PND15 (new)", n2 = "PND14 (lit)")

PND15 (old) vs PND15 (new)

plot_expr(s1 = ns, s2 = lab, p1 = "PND15", p2 = "PND15", n1 = "PND15 (new)", n2 = "PND15 (old)")

Expression comparison of PNW8/21

PNW8 vs PNW21

plot_expr(s1 = ns, s2 = lit, p1 = "PNW21", p2 = "PNW8", n1 = "PNW21 (new)", n2 = "PNW8 (lit)")

Heatmap of some markers

genes <- data.frame(
  Category = c(
    rep("Germ cell licensing", 5),
    rep("Undifferentiated Spermatogonia markers", 12),
    "Sertoli cell marker",
    "Leyidig cell marker",
    "Translational initiation factor",
    "Translational initiation factor",
    "Housekeeping gene"
  ),
  Genes = c(
    "Nanos2", "Nanos3", "Dazl", "Ddx4", "Dmrt1",
    "Taf4b", "Neurog3", "Utf1", "Id4", "Zbtb16", "Pou5f1",
    "Lin28a", "Bcl6b", "Sall4", "Sohlh1", "Foxo1", "Gfra1",
    "Gata4", "Lhcgr",
    "Eif4h", "Eif4e", "Gapdh"
  ), stringsAsFactors = F
)
rownames(genes) <- genes$Genes

genes1 <- data.frame(
  Category = c(
    rep("Stem cell markers", 5),
    rep("Germline pluripotency marker", 7),
    rep("Progenitor-associated marker", 3),
    "Leyidig cell marker",
    rep("Sertoli cell marker", 2)
  ),
  Genes = c(
    "Lhx1", "Id4", "Gfra1", "Etv5", "Ret",
    "Bcl6b", "Foxo1", "Sall4", "Dmrt1", "Zbtb16", "Dazl", "Ddx4",
    "Neurog3", "Sohlh1", "Lin28b",
    "Lhcgr",
    "Gata4", "Vim"
  ), stringsAsFactors = F
)
rownames(genes1) <- genes1$Genes

genes <- genes[!rownames(genes) %in% intersect(genes1$Genes, genes$Genes),]
genes <- rbind(genes, genes1)

genes$Category <- factor(genes$Category, unique(genes$Category)[c(2,5,6,1,7,8,9,3,4)])

genes_sp <- split(genes, genes$Category)
genes <- ldply(genes_sp[unique(genes$Category)[c(2,5,6,1,7,8,9,3,4)]], .id = NULL)

rownames(genes) <- genes$Genes

tab <- data.frame(lit@gene.counts, lab@gene.counts, ns@gene.counts)
tab <- DGEList(tab)
tab <- calcNormFactors(tab)
cpm <- cpm(tab, log = T)

tab <- cpm[genes$Genes, ]
tab[mapply(is.infinite, tab)] <- 0

tab <- tab[, c(
  grep(pattern = "PND7", x = colnames(tab), value = T),
  grep(pattern = "PND8_[0-9][0-9]$", x = colnames(tab), value = T),
  grep(pattern = "PND8_[0-9][0-9]_", x = colnames(tab), value = T),
  grep(pattern = "PND14", x = colnames(tab), value = T),
  grep(pattern = "PND15_[0-9][0-9]$", x = colnames(tab), value = T),
  grep(pattern = "PND15_[0-9][0-9]_", x = colnames(tab), value = T),
  grep(pattern = "PNW8", x = colnames(tab), value = T),
  grep(pattern = "PNW21", x = colnames(tab), value = T),
  grep(pattern = "Adult", x = colnames(tab), value = T)
)]


lit@phenoData$Source <- "Literature"
## Loading required package: plgINS
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## Warning: replacing previous import 'GenomicRanges::shift' by 'data.table::shift'
## when loading 'plgINS'
lab@phenoData$Source <- "Lab (old)"
annoCol <- rbind(lab@phenoData, lit@phenoData)
annoCol$LibPrepBatch <- NA

pn <- ns@phenoData
colnames(pn)[3] <- "Samples"
pn$Source <- "Lab (new)"
annoCol <- rbind(annoCol, pn)
annoCol$seqType <- "Ribo0"
annoCol$seqType[annoCol$Group == "Adult"] <- "polyA"
annoCol$Group[annoCol$Group == "Adult"] <- "PNW21"

annoCol$Group <- factor(annoCol$Group, unique(annoCol$Group)[c(5,3,4,2,6,1)])

Heatmap

paletteLength <- 100
myColor <- colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdBu")))(paletteLength)
myBreaks <- c(seq(min(tab), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(tab)/paletteLength, max(tab), length.out=floor(paletteLength/2)))

pheatmap(
  mat = tab, color =  myColor, border_color = NA, breaks = myBreaks,
  cluster_cols = F, cluster_rows = F,
  gaps_col = c(16, 32),
  gaps_row = c(4, 9,16, 18,21, 22, 24,26),
  annotation_row = genes[, 1, drop = F],
  annotation_col = annoCol[, c(1,3:5), drop = F], show_colnames = FALSE,
)

SessionInfo

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 4.0.4 (2021-02-15)
##  os       Ubuntu 16.04.7 LTS          
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       Europe/Zurich               
##  date     2021-06-25                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version  date       lib source        
##  annotate               1.68.0   2020-10-27 [1] Bioconductor  
##  AnnotationDbi          1.52.0   2020-10-27 [1] Bioconductor  
##  assertthat             0.2.1    2019-03-21 [1] CRAN (R 4.0.4)
##  Biobase                2.50.0   2020-10-27 [1] Bioconductor  
##  BiocGenerics           0.36.1   2021-04-16 [1] Bioconductor  
##  BiocParallel           1.24.1   2020-11-06 [1] Bioconductor  
##  Biostrings             2.58.0   2020-10-27 [1] Bioconductor  
##  bit                    4.0.4    2020-08-04 [1] CRAN (R 4.0.4)
##  bit64                  4.0.5    2020-08-30 [1] CRAN (R 4.0.4)
##  bitops                 1.0-7    2021-04-24 [1] CRAN (R 4.0.4)
##  blob                   1.2.1    2020-01-20 [1] CRAN (R 4.0.4)
##  bookdown               0.22     2021-04-22 [1] CRAN (R 4.0.4)
##  bslib                  0.2.4    2021-01-25 [1] CRAN (R 4.0.4)
##  cachem                 1.0.4    2021-02-13 [1] CRAN (R 4.0.4)
##  callr                  3.7.0    2021-04-20 [1] CRAN (R 4.0.4)
##  cli                    2.5.0    2021-04-26 [1] CRAN (R 4.0.4)
##  colorspace             2.0-0    2020-11-11 [1] CRAN (R 4.0.4)
##  crayon                 1.4.1    2021-02-08 [1] CRAN (R 4.0.4)
##  crosstalk              1.1.1    2021-01-12 [1] CRAN (R 4.0.4)
##  data.table             1.14.0   2021-02-21 [1] CRAN (R 4.0.4)
##  DBI                    1.1.1    2021-01-15 [1] CRAN (R 4.0.4)
##  DelayedArray           0.16.3   2021-03-24 [1] Bioconductor  
##  desc                   1.3.0    2021-03-05 [1] CRAN (R 4.0.4)
##  DESeq2                 1.30.1   2021-02-19 [1] Bioconductor  
##  devtools               2.4.2    2021-06-07 [1] CRAN (R 4.0.4)
##  digest                 0.6.27   2020-10-24 [1] CRAN (R 4.0.4)
##  dplyr                  1.0.5    2021-03-05 [1] CRAN (R 4.0.4)
##  edgeR                * 3.32.1   2021-01-14 [1] Bioconductor  
##  ellipsis               0.3.1    2020-05-15 [1] CRAN (R 4.0.4)
##  evaluate               0.14     2019-05-28 [1] CRAN (R 4.0.4)
##  fansi                  0.4.2    2021-01-15 [1] CRAN (R 4.0.4)
##  farver                 2.1.0    2021-02-28 [1] CRAN (R 4.0.4)
##  fastmap                1.1.0    2021-01-25 [1] CRAN (R 4.0.4)
##  fs                     1.5.0    2020-07-31 [1] CRAN (R 4.0.4)
##  genefilter             1.72.1   2021-01-21 [1] Bioconductor  
##  geneplotter            1.68.0   2020-10-27 [1] Bioconductor  
##  generics               0.1.0    2020-10-31 [1] CRAN (R 4.0.4)
##  GenomeInfoDb           1.26.7   2021-04-08 [1] Bioconductor  
##  GenomeInfoDbData       1.2.4    2021-03-30 [1] Bioconductor  
##  GenomicRanges          1.42.0   2020-10-27 [1] Bioconductor  
##  GEOquery               2.58.0   2020-10-27 [1] Bioconductor  
##  ggplot2              * 3.3.3    2020-12-30 [1] CRAN (R 4.0.4)
##  glue                   1.4.2    2020-08-27 [1] CRAN (R 4.0.4)
##  gridExtra              2.3      2017-09-09 [1] CRAN (R 4.0.4)
##  gtable                 0.3.0    2019-03-25 [1] CRAN (R 4.0.4)
##  highr                  0.9      2021-04-16 [1] CRAN (R 4.0.4)
##  hms                    1.0.0    2021-01-13 [1] CRAN (R 4.0.4)
##  htmltools              0.5.1.1  2021-01-22 [1] CRAN (R 4.0.4)
##  htmlwidgets            1.5.3    2020-12-10 [1] CRAN (R 4.0.4)
##  httr                   1.4.2    2020-07-20 [1] CRAN (R 4.0.4)
##  IRanges                2.24.1   2020-12-12 [1] Bioconductor  
##  jquerylib              0.1.4    2021-04-26 [1] CRAN (R 4.0.4)
##  jsonlite               1.7.2    2020-12-09 [1] CRAN (R 4.0.4)
##  knitr                  1.33     2021-04-24 [1] CRAN (R 4.0.4)
##  lattice                0.20-41  2020-04-02 [1] CRAN (R 4.0.4)
##  lazyeval               0.2.2    2019-03-15 [1] CRAN (R 4.0.4)
##  lifecycle              1.0.0    2021-02-15 [1] CRAN (R 4.0.4)
##  limma                * 3.46.0   2020-10-27 [1] Bioconductor  
##  locfit                 1.5-9.4  2020-03-25 [1] CRAN (R 4.0.4)
##  magrittr               2.0.1    2020-11-17 [1] CRAN (R 4.0.4)
##  MASS                   7.3-53.1 2021-02-12 [1] CRAN (R 4.0.4)
##  Matrix                 1.3-2    2021-01-06 [1] CRAN (R 4.0.4)
##  MatrixGenerics         1.2.1    2021-01-30 [1] Bioconductor  
##  matrixStats            0.58.0   2021-01-29 [1] CRAN (R 4.0.4)
##  memoise                2.0.0    2021-01-26 [1] CRAN (R 4.0.4)
##  munsell                0.5.0    2018-06-12 [1] CRAN (R 4.0.4)
##  pheatmap             * 1.0.12   2019-01-04 [1] CRAN (R 4.0.4)
##  pillar                 1.6.0    2021-04-13 [1] CRAN (R 4.0.4)
##  pkgbuild               1.2.0    2020-12-15 [1] CRAN (R 4.0.4)
##  pkgconfig              2.0.3    2019-09-22 [1] CRAN (R 4.0.4)
##  pkgload                1.2.1    2021-04-06 [1] CRAN (R 4.0.4)
##  plgINS               * 0.1.5    2021-05-03 [1] local         
##  plotly               * 4.9.3    2021-01-10 [1] CRAN (R 4.0.4)
##  plyr                 * 1.8.6    2020-03-03 [1] CRAN (R 4.0.4)
##  prettyunits            1.1.1    2020-01-24 [1] CRAN (R 4.0.4)
##  processx               3.5.1    2021-04-04 [1] CRAN (R 4.0.4)
##  ps                     1.6.0    2021-02-28 [1] CRAN (R 4.0.4)
##  purrr                  0.3.4    2020-04-17 [1] CRAN (R 4.0.4)
##  R6                     2.5.0    2020-10-28 [1] CRAN (R 4.0.4)
##  RColorBrewer           1.1-2    2014-12-07 [1] CRAN (R 4.0.4)
##  Rcpp                   1.0.6    2021-01-15 [1] CRAN (R 4.0.4)
##  RCurl                  1.98-1.3 2021-03-16 [1] CRAN (R 4.0.4)
##  readr                  1.4.0    2020-10-05 [1] CRAN (R 4.0.4)
##  remotes                2.3.0    2021-04-01 [1] CRAN (R 4.0.4)
##  rlang                  0.4.10   2020-12-30 [1] CRAN (R 4.0.4)
##  rmarkdown              2.7      2021-02-19 [1] CRAN (R 4.0.4)
##  rmdformats             1.0.1    2021-01-13 [1] CRAN (R 4.0.4)
##  robcor               * 0.1-6    2014-01-06 [1] CRAN (R 4.0.4)
##  rprojroot              2.0.2    2020-11-15 [1] CRAN (R 4.0.4)
##  RSQLite                2.2.7    2021-04-22 [1] CRAN (R 4.0.4)
##  S4Vectors              0.28.1   2020-12-09 [1] Bioconductor  
##  sass                   0.3.1    2021-01-24 [1] CRAN (R 4.0.4)
##  scales                 1.1.1    2020-05-11 [1] CRAN (R 4.0.4)
##  sessioninfo            1.1.1    2018-11-05 [1] CRAN (R 4.0.4)
##  SRAdb                  1.52.0   2020-10-27 [1] Bioconductor  
##  stringi                1.5.3    2020-09-09 [1] CRAN (R 4.0.4)
##  stringr                1.4.0    2019-02-10 [1] CRAN (R 4.0.4)
##  SummarizedExperiment   1.20.0   2020-10-27 [1] Bioconductor  
##  survival               3.2-11   2021-04-26 [1] CRAN (R 4.0.4)
##  testthat               3.0.2    2021-02-14 [1] CRAN (R 4.0.4)
##  tibble                 3.1.1    2021-04-18 [1] CRAN (R 4.0.4)
##  tidyr                  1.1.3    2021-03-03 [1] CRAN (R 4.0.4)
##  tidyselect             1.1.0    2020-05-11 [1] CRAN (R 4.0.4)
##  usethis                2.0.1    2021-02-10 [1] CRAN (R 4.0.4)
##  utf8                   1.2.1    2021-03-12 [1] CRAN (R 4.0.4)
##  vctrs                  0.3.7    2021-03-29 [1] CRAN (R 4.0.4)
##  viridis              * 0.6.0    2021-04-15 [1] CRAN (R 4.0.4)
##  viridisLite          * 0.4.0    2021-04-13 [1] CRAN (R 4.0.4)
##  withr                  2.4.2    2021-04-18 [1] CRAN (R 4.0.4)
##  xfun                   0.24     2021-06-15 [1] CRAN (R 4.0.4)
##  XML                    3.99-0.6 2021-03-16 [1] CRAN (R 4.0.4)
##  xml2                   1.3.2    2020-04-23 [1] CRAN (R 4.0.4)
##  xtable                 1.8-4    2019-04-21 [1] CRAN (R 4.0.4)
##  XVector                0.30.0   2020-10-27 [1] Bioconductor  
##  yaml                   2.2.1    2020-02-01 [1] CRAN (R 4.0.4)
##  zlibbioc               1.36.0   2020-10-27 [1] Bioconductor  
## 
## [1] /home/ubuntu/R/x86_64-pc-linux-gnu-library/4.0
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library